In [1]:
%matplotlib inline
from __future__ import division, print_function
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
import matplotlib.pyplot as plt
from sklearn.cross_validation import StratifiedShuffleSplit
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
import numpy as np
import time

In [155]:
mnist_dir = '../dat'
dat = read_data_sets(mnist_dir)
train_size = 100
sss = StratifiedShuffleSplit(dat.train.labels,train_size=train_size, 
                             test_size=dat.train.labels.shape[0]-train_size, n_iter=1, random_state = 1234)
i_tr, i_unlab = [i for i in sss][0]
Xtr = dat.train.images[i_tr]
ytr = dat.train.labels[i_tr]
Xunl = dat.train.images[i_unlab]
yunl = dat.train.labels[i_unlab]
Xval = dat.validation.images
yval = dat.validation.labels


Extracting ../dat/train-images-idx3-ubyte.gz
Extracting ../dat/train-labels-idx1-ubyte.gz
Extracting ../dat/t10k-images-idx3-ubyte.gz
Extracting ../dat/t10k-labels-idx1-ubyte.gz

Ordinary discriminative CNN using propagated labels


In [246]:
y_train = np.hstack((ytr, yprop))
y_train = np.eye(10)[y_train]
ytr_oh = np.eye(10)[ytr]
yprop_oh = np.eye(10)[yprop]

In [297]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.optimizers import Adam
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
from sklearn.metrics import log_loss
from keras.regularizers import WeightRegularizer


def create_model(img_rows, img_cols,learning_rate, reg, decay, momentum=.9, dropout = .5):
    model = Sequential()
    model.add(Convolution2D(128, 5, 5, border_mode='same', init='he_normal', W_regularizer=WeightRegularizer(l1=reg),
                            input_shape=(1, img_rows, img_cols)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(dropout))

    model.add(Convolution2D(312, 3, 3, border_mode='same', init='he_normal', W_regularizer=WeightRegularizer(l1=reg)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(dropout))

    model.add(Convolution2D(172, 3, 3, border_mode='same', init='he_normal', W_regularizer=WeightRegularizer(l1=reg)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(dropout))

    model.add(Convolution2D(172, 3, 3, border_mode='same', init='he_normal', W_regularizer=WeightRegularizer(l1=reg)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(dropout))

    model.add(Flatten())
    model.add(Dense(48))
    
    model.add(Dense(10))
    model.add(Activation('softmax'))

    sgd = SGD(lr=learning_rate, decay=decay, momentum=0.9, nesterov=True)
    model.compile(optimizer=sgd, loss='categorical_crossentropy')
    return model

In [298]:
batch_size = 32
nb_epoch = 50
dropout = .60
reg = 1e-5
learning_rate = 1e-4
decay = 1e-5
momentum = .95
prop_weight = .1

sample_weight = np.ones((x_train.shape[0],))
sample_weight[train_size:] /= prop_weight

model = create_model(28, 28, learning_rate=learning_rate, reg=reg, decay = decay, dropout = dropout)

model.fit(x_train, np.vstack((ytr_oh, yprop_oh)), batch_size=batch_size, nb_epoch=nb_epoch,
      shuffle=True, verbose=2, validation_data=(x_test, y_test),
      callbacks=[EarlyStopping(monitor='val_loss', patience=4, verbose=0)],
            sample_weight=sample_weight)

predictions_valid = model.predict(x_test, verbose=1)

print('Accuracy: {}'.format((y_test.argmax(axis=1)==predictions_valid.argmax(axis=1)).mean()))


Train on 55000 samples, validate on 5000 samples
Epoch 1/50
65s - loss: 20.2227 - val_loss: 1.1938
Epoch 2/50
64s - loss: 10.9600 - val_loss: 0.7252
Epoch 3/50
65s - loss: 8.5136 - val_loss: 0.5277
Epoch 4/50
64s - loss: 7.0776 - val_loss: 0.4190
Epoch 5/50
63s - loss: 6.2268 - val_loss: 0.3740
Epoch 6/50
63s - loss: 5.6243 - val_loss: 0.3132
Epoch 7/50
63s - loss: 5.1920 - val_loss: 0.3072
Epoch 8/50
63s - loss: 4.8922 - val_loss: 0.2787
Epoch 9/50
63s - loss: 4.6581 - val_loss: 0.2652
Epoch 10/50
63s - loss: 4.4685 - val_loss: 0.2587
Epoch 11/50
63s - loss: 4.2871 - val_loss: 0.2638
Epoch 12/50
63s - loss: 4.2007 - val_loss: 0.2552
Epoch 13/50
63s - loss: 4.0642 - val_loss: 0.2361
Epoch 14/50
63s - loss: 3.9798 - val_loss: 0.2329
Epoch 15/50
63s - loss: 3.8743 - val_loss: 0.2433
Epoch 16/50
63s - loss: 3.8263 - val_loss: 0.2387
Epoch 17/50
63s - loss: 3.7826 - val_loss: 0.2262
Epoch 18/50
63s - loss: 3.6875 - val_loss: 0.2337
Epoch 19/50
63s - loss: 3.6420 - val_loss: 0.2553
Epoch 20/50
63s - loss: 3.5835 - val_loss: 0.2302
Epoch 21/50
63s - loss: 3.5612 - val_loss: 0.2361
Epoch 22/50
63s - loss: 3.5293 - val_loss: 0.2361
5000/5000 [==============================] - 2s     
Accuracy: 0.929

In [308]:
#doublecheck on holdout set
x_ho = dat.test.images.reshape(dat.test.images.shape[0],1,28,28)
predictions_test = model.predict(x_ho)
print('Accuracy: {}'.format((dat.test.labels==predictions_test.argmax(axis=1)).mean()))


Accuracy: 0.9398